################################################################################
## LOAD PACKAGES
################################################################################

library(ggplot2)
library(igraph)
library(ggraph)
library(patchwork)
library(dplyr)
library(readxl)
library(ggrepel)
library(tidygraph)
library(purrr)

################################################################################
## EXPLAINED + CUMULATIVE VARIANCE VISUALIZATION (EFA)
################################################################################

# Read only Ireland EFA variance data
ireland.efa.var.df <- read_excel("files/ireland_efa_var_df.xlsx")
ireland.efa.var.df$vaa <- "Ireland"

# Convert to percentage
ireland.efa.var.df$Proportion_Variance <- ireland.efa.var.df$Proportion_Variance * 100

# Calculate summary stats
variance.summary <- ireland.efa.var.df %>%
  summarise(
    avg_variance = mean(Proportion_Variance),
    cumulative_variance = sum(Proportion_Variance)
  )

# Merge for annotation
ireland.efa.var.df <- ireland.efa.var.df %>%
  mutate(
    avg_variance = variance.summary$avg_variance,
    cumulative_variance = variance.summary$cumulative_variance
  )

# Create png output
png("figs/ireland_efa_var.png", units = "in", width = 4, height = 4, res = 1200)

# Plot
ireland.efa.var.plot <- ggplot(ireland.efa.var.df, aes(x = Factor, y = Proportion_Variance, fill = Factor)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  geom_text(aes(label = round(Proportion_Variance, 1)), 
            vjust = -0.3, size = 2.5, color = "black") +
  # Upper-right annotation
  geom_text(data = unique(ireland.efa.var.df), 
            aes(x = Inf, y = Inf, 
                label = paste0("Avg. var. = ", round(avg_variance, 1), "%\nCum. var. = ", round(cumulative_variance, 1), "%")), 
            inherit.aes = FALSE, size = 3, hjust = 1.05, vjust = 1.2) +
  scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, by = 5)) +
  labs(
    y = "Explained Variance (%)",
    x = "Factor",
    caption = paste0("Average explained variance = ", round(variance.summary$avg_variance, 1), "%\n",
                     "Cumulative variance = ", round(variance.summary$cumulative_variance, 1), "%")
  ) +
  theme_minimal(base_size = 10) +
  scale_fill_brewer(palette = "Set2") +
  ggtitle("(a) Explained Variance") +
  theme(
    axis.title.x = element_text(size = 9, margin = margin(t = 8)),
    axis.title.y = element_text(size = 9, margin = margin(r = 8)),
    axis.text = element_text(size = 8),
    plot.title = element_text(size = 11),
    plot.caption = element_text(size = 10, hjust = 0),
    panel.grid.major = element_line(size = 0.15, color = "gray90"),
    panel.grid.minor = element_line(size = 0.15, color = "gray90")
  )

ireland.efa.var.plot

dev.off()

################################################################################
## EXPLAINED + CUMULATIVE VARIANCE VISUALIZATION (IRT)
################################################################################

# Read only Ireland IRT variance data
ireland.irt.var.df <- read_excel("files/ireland_irt_var_df.xlsx")
ireland.irt.var.df$vaa <- "Ireland"

# Convert to percentage
ireland.irt.var.df$Proportion_Variance <- ireland.irt.var.df$Proportion_Variance * 100

# Calculate summary stats
variance.summary <- ireland.irt.var.df %>%
  summarise(
    avg_variance = mean(Proportion_Variance),
    cumulative_variance = sum(Proportion_Variance)
  )

# Merge for annotation
ireland.irt.var.df <- ireland.irt.var.df %>%
  mutate(
    avg_variance = variance.summary$avg_variance,
    cumulative_variance = variance.summary$cumulative_variance
  )

# Create png output
png("figs/ireland_irt_var.png", units = "in", width = 4, height = 4, res = 1200)

# Plot
ireland.irt.var.plot <- ggplot(ireland.irt.var.df, aes(x = Factor, y = Proportion_Variance, fill = Factor)) +
  geom_bar(stat = "identity", show.legend = FALSE) +
  geom_text(aes(label = round(Proportion_Variance, 1)), 
            vjust = -0.3, size = 2.5, color = "black") +
  # Upper-right annotation
  geom_text(data = unique(ireland.irt.var.df), 
            aes(x = Inf, y = Inf, 
                label = paste0("Avg. var. = ", round(avg_variance, 1), "%\nCum. var. = ", round(cumulative_variance, 1), "%")), 
            inherit.aes = FALSE, size = 3, hjust = 1.05, vjust = 1.2) +
  scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, by = 5)) +
  labs(
    y = "Explained Variance (%)",
    x = "Factor",
    caption = paste0("Average explained variance = ", round(variance.summary$avg_variance, 1), "%\n",
                     "Cumulative variance = ", round(variance.summary$cumulative_variance, 1), "%")
  ) +
  theme_minimal(base_size = 10) +
  scale_fill_brewer(palette = "Set2") +
  ggtitle("(a) Explained Variance") +
  theme(
    axis.title.x = element_text(size = 9, margin = margin(t = 8)),
    axis.title.y = element_text(size = 9, margin = margin(r = 8)),
    axis.text = element_text(size = 8),
    plot.title = element_text(size = 11),
    plot.caption = element_text(size = 10, hjust = 0),
    panel.grid.major = element_line(size = 0.15, color = "gray90"),
    panel.grid.minor = element_line(size = 0.15, color = "gray90")
  )

ireland.irt.var.plot

dev.off()

################################################################################
## INTER-FACTOR CORRELATIONS (EFA)
################################################################################

# Load the data for ireland
ireland.efa.inter.df <- read_excel("files/ireland_efa_inter_correlations.xlsx")
ireland.efa.inter.df$vaa <- "Ireland"

# Create a list with only the `ireland` dataset
efa.matrices <- lapply(list(ireland.efa.inter.df), function(df) {
  df <- df %>%
    select(Factor1, Factor2, Correlation) %>%
    distinct()
  
  # Get full list of factors used
  factors_all <- sort(unique(c(df$Factor1, df$Factor2)))
  
  # Create an empty correlation matrix
  mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all),
                dimnames = list(factors_all, factors_all))
  
  # Fill matrix with correlations
  for (i in 1:nrow(df)) {
    f1 <- df$Factor1[i]
    f2 <- df$Factor2[i]
    val <- df$Correlation[i]
    mat[f1, f2] <- val
    mat[f2, f1] <- val  # Ensure symmetry
  }
  
  # Set diagonal to 1
  diag(mat) <- 1
  
  return(mat)
})

# Compute the average correlation for the plot
compute_avg_correlation <- function(mat) {
  vals <- abs(mat[lower.tri(mat)])
  mean(vals, na.rm = TRUE)
}

# Calculate average correlation for ireland
cor_avg_per_plot <- compute_avg_correlation(efa.matrices[[1]])

# Function to convert correlation matrix to dataframe
cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))  # Convert to long format
  cor_df$Dataset <- dataset_name  # Add dataset name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]  # Remove self-correlations
  return(cor_df)
}

# Convert ireland matrix to data frame
cor_df_ireland <- cor_matrix_to_df(efa.matrices[[1]], "Ireland")

# Function to create network plot
make_graph_plot_from_df <- function(cor_df, dataset_name, color, avg_corr) {
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- create_layout(graph, layout = "kk")
  
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Adjust limits for the plot
  xlim_range <- range(layout$x) + c(-0.3, 0.3)
  ylim_range <- range(layout$y) + c(-0.5, 0.3)
  
  p <- ggraph(layout) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "gray"))),
                   alpha = 0.5) +
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 3, color = "black", segment.color = NA, max.overlaps = Inf) +
    geom_node_point(size = 7, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 3) +
    
    # Add mean correlation text
    annotate("text", 
             x = xlim_range[2] - 1.2, 
             y = ylim_range[1] + 0.4, 
             label = paste0("Mean abs. corr. = ", round(avg_corr, 2)),
             hjust = 1, vjust = 1., size = 3, fontface = "italic") +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.5, 5)) +
    scale_edge_color_identity() +
    
    theme_void() +
    theme(
      plot.title = element_text(size = 11, margin = margin(b = 0), hjust = 0.5),
      legend.position = "none"
    ) +
    
    labs(title = dataset_name)
  
  return(p)
}

# Create the plot for ireland
network.plot.efa.ireland <- make_graph_plot_from_df(cor_df_ireland, "(b) Inter-factor Correlations", "black", cor_avg_per_plot)

# Save the plot
ggsave("figs/network_plot_efa_ireland.png", network.plot.efa.ireland,
       width = 12, height = 12, dpi = 300)

################################################################################
## INTER-FACTOR CORRELATIONS (IRT)
################################################################################

library(tidyverse)
library(igraph)
library(ggraph)
library(ggrepel)
library(patchwork)
library(readxl)
library(purrr)

# -------------------------------
# Load the Ireland Data
# -------------------------------

ireland.irt.inter <- read_excel("files/ireland_irt_inter_correlations.xlsx")

# Add the 'vaa' column
ireland.irt.inter$vaa <- "Ireland"

# Convert the data to a correlation matrix
ireland.irt.df <- ireland.irt.inter %>%
  select(Factor1, Factor2, Correlation) %>%
  distinct()

# Get the full list of factors
factors_all <- sort(unique(c(ireland.irt.df$Factor1, ireland.irt.df$Factor2)))

# Create the correlation matrix
mat <- matrix(NA, nrow = length(factors_all), ncol = length(factors_all), dimnames = list(factors_all, factors_all))

# Fill the matrix with correlations
for (i in 1:nrow(ireland.irt.df)) {
  f1 <- ireland.irt.df$Factor1[i]
  f2 <- ireland.irt.df$Factor2[i]
  val <- ireland.irt.df$Correlation[i]
  mat[f1, f2] <- val
  mat[f2, f1] <- val  # Ensure symmetry
}

# Set diagonal to 1
diag(mat) <- 1

# -------------------------------
# Convert Correlation Matrix to DataFrame
# -------------------------------

# Function to convert the matrix to a data frame
cor_matrix_to_df <- function(cor_mat, dataset_name) {
  cor_df <- as.data.frame(as.table(cor_mat))  # Convert to long format
  cor_df$Dataset <- dataset_name  # Add dataset name
  colnames(cor_df) <- c("Factor1", "Factor2", "Correlation", "Dataset")
  cor_df <- cor_df[cor_df$Factor1 != cor_df$Factor2, ]  # Remove self-correlations
  return(cor_df)
}

# Convert the matrix for Ireland
cor_df_ireland <- cor_matrix_to_df(mat, "Ireland")

# -------------------------------
# Calculate Average Correlation
# -------------------------------

# Function to compute the average correlation
compute_avg_correlation <- function(mat) {
  vals <- abs(mat[lower.tri(mat)])
  mean(vals, na.rm = TRUE)
}

# Average correlation for Ireland
cor_avg_ireland <- compute_avg_correlation(mat)

# -------------------------------
# Create Network Plot
# -------------------------------

make_graph_plot_from_df <- function(cor_df, dataset_name, color, avg_corr) {
  # Remove duplicated edges (F1-F2 / F2-F1)
  cor_df <- cor_df %>%
    mutate(pair = pmap_chr(list(Factor1, Factor2), ~ paste(sort(c(.x, .y)), collapse = "-"))) %>%
    distinct(pair, .keep_all = TRUE) %>%
    select(-pair)
  
  # Create graph
  graph <- graph_from_data_frame(cor_df, directed = FALSE)
  E(graph)$Correlation <- cor_df$Correlation
  layout <- create_layout(graph, layout = "kk")
  
  # Prepare edge label positions
  edge_df <- as.data.frame(get.edgelist(graph))
  colnames(edge_df) <- c("from", "to")
  edge_df$Correlation <- E(graph)$Correlation
  
  edge_df <- edge_df %>%
    left_join(layout[, c("name", "x", "y")], by = c("from" = "name")) %>%
    rename(x_from = x, y_from = y) %>%
    left_join(layout[, c("name", "x", "y")], by = c("to" = "name")) %>%
    rename(x_to = x, y_to = y) %>%
    mutate(x_mid = (x_from + x_to) / 2,
           y_mid = (y_from + y_to) / 2)
  
  # Set the plot limits
  xlim_range <- range(layout$x) + c(-0.2, 0.2)
  ylim_range <- range(layout$y) + c(-0.2, 0.2)
  
  # Create plot
  p <- ggraph(layout) +
    geom_edge_link(aes(width = abs(Correlation),
                       color = ifelse(abs(Correlation) >= 0.5, "#45b6fe",
                                      ifelse(abs(Correlation) >= 0.3, "red", "gray"))),
                   alpha = 0.5) +
    geom_text_repel(data = edge_df,
                    aes(x = x_mid, y = y_mid, label = sprintf("%.2f", Correlation)),
                    size = 3, color = "black", segment.color = NA, max.overlaps = Inf) +
    geom_node_point(size = 7, color = color) +
    geom_node_text(aes(label = name), color = "white", size = 3) +
    
    # Add mean correlation annotation
    annotate("text", 
             x = xlim_range[2] - 1.2, 
             y = ylim_range[1] + 0.2, 
             label = paste0("Mean abs. corr. = ", round(avg_corr, 2)),
             hjust = 1, vjust = 1., size = 3, fontface = "italic") +
    
    xlim(xlim_range) +
    ylim(ylim_range) +
    
    scale_edge_width(limits = c(0, 0.6), range = c(0.5, 5)) +
    scale_edge_color_identity() +
    
    theme_void() +
    theme(
      plot.title = element_text(size = 11, margin = margin(b = 0), hjust = 0.5),
      legend.position = "none"
    ) +
    
    labs(title = dataset_name)
  
  return(p)
}

# Create the plot for Ireland
network.plot.irt.ireland <- make_graph_plot_from_df(cor_df_ireland, "(b) Inter-factor Correlations", "black", cor_avg_ireland)

# -------------------------------
# Save the Plot
# -------------------------------

# Save the network plot
ggsave("figs/network_plot_irt_ireland.png", network.plot.irt.ireland, width = 12, height = 12, dpi = 300)

############################################

# Combine the plots (side-by-side)
ireland.efa.combined.plot <- ireland.efa.var.plot + network.plot.efa.ireland
ireland.irt.combined.plot <- ireland.irt.var.plot + network.plot.irt.ireland

# Save to PNG
ggsave("figs/ireland_efa_combined_plot.png", ireland.efa.combined.plot, width = 10, height = 5, dpi = 300)
ggsave("figs/ireland_irt_combined_plot.png", ireland.irt.combined.plot, width = 10, height = 5, dpi = 300)